In [1]:
import pandas as pd
In [8]:
person = pd.read_csv("Motor_Vehicle_Collisions_-_Person.tsv", sep="\t")
In [9]:
person.head()
Out[9]:
UNIQUE_ID COLLISION_ID CRASH_DATE CRASH_TIME PERSON_ID PERSON_TYPE PERSON_INJURY VEHICLE_ID PERSON_AGE EJECTION ... BODILY_INJURY POSITION_IN_VEHICLE SAFETY_EQUIPMENT PED_LOCATION PED_ACTION COMPLAINT PED_ROLE CONTRIBUTING_FACTOR_1 CONTRIBUTING_FACTOR_2 PERSON_SEX
0 10249006 4229554 10/26/2019 9:43 31aa2bc0-f545-444f-8cdb-f1cb5cf00b89 Occupant Unspecified 19141108.0 NaN NaN ... NaN NaN NaN NaN NaN NaN Registrant NaN NaN U
1 10255054 4230587 10/25/2019 15:15 4629e500-a73e-48dc-b8fb-53124d124b80 Occupant Unspecified 19144075.0 33.0 Not Ejected ... Does Not Apply Front passenger, if two or more persons, inclu... Lap Belt & Harness NaN NaN Does Not Apply Passenger NaN NaN F
2 10253177 4230550 10/26/2019 17:55 ae48c136-1383-45db-83f4-2a5eecfb7cff Occupant Unspecified 19143133.0 55.0 NaN ... NaN NaN NaN NaN NaN NaN Registrant NaN NaN M
3 6650180 3565527 11/21/2016 13:05 2782525 Occupant Unspecified NaN NaN NaN ... NaN NaN NaN NaN NaN NaN Notified Person NaN NaN NaN
4 10255516 4231168 10/25/2019 11:16 e038e18f-40fb-4471-99cf-345eae36e064 Occupant Unspecified 19144329.0 7.0 Not Ejected ... Does Not Apply Right rear passenger or motorcycle sidecar pas... Lap Belt NaN NaN Does Not Apply Passenger NaN NaN F

5 rows × 21 columns

In [10]:
vehicles = pd.read_csv("Motor_Vehicle_Collisions_-_Vehicles.tsv", sep="\t")
/var/folders/8q/3zgzcf5j6fbfp97d3vlhbt_m0000gn/T/ipykernel_19993/3959542513.py:1: DtypeWarning: Columns (8) have mixed types. Specify dtype option on import or set low_memory=False.
  vehicles = pd.read_csv("Motor_Vehicle_Collisions_-_Vehicles.tsv", sep="\t")
In [11]:
vehicles.head()
Out[11]:
UNIQUE_ID COLLISION_ID CRASH_DATE CRASH_TIME VEHICLE_ID STATE_REGISTRATION VEHICLE_TYPE VEHICLE_MAKE VEHICLE_MODEL VEHICLE_YEAR ... PRE_CRASH POINT_OF_IMPACT VEHICLE_DAMAGE VEHICLE_DAMAGE_1 VEHICLE_DAMAGE_2 VEHICLE_DAMAGE_3 PUBLIC_PROPERTY_DAMAGE PUBLIC_PROPERTY_DAMAGE_TYPE CONTRIBUTING_FACTOR_1 CONTRIBUTING_FACTOR_2
0 10385780 100201 09/07/2012 9:03 1 NY PASSENGER VEHICLE NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN Unspecified NaN
1 19140702 4213082 09/23/2019 8:15 0553ab4d-9500-4cba-8d98-f4d7f89d5856 NY Station Wagon/Sport Utility Vehicle TOYT -CAR/SUV NaN 2002.0 ... Going Straight Ahead Left Front Bumper Left Front Quarter Panel NaN NaN NaN N NaN Driver Inattention/Distraction Unspecified
2 14887647 3307608 10/02/2015 17:18 2 NY TAXI NaN NaN NaN ... Going Straight Ahead NaN NaN NaN NaN NaN NaN NaN Driver Inattention/Distraction NaN
3 14889754 3308693 10/04/2015 20:34 1 NY PASSENGER VEHICLE NaN NaN NaN ... Parked NaN NaN NaN NaN NaN NaN NaN Unspecified NaN
4 14400270 297666 04/25/2013 21:15 1 NY PASSENGER VEHICLE NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN Other Vehicular NaN

5 rows × 25 columns

In [23]:
collision = pd.read_csv("bigquery_csv.csv",error_bad_lines=False, engine="python")
/var/folders/8q/3zgzcf5j6fbfp97d3vlhbt_m0000gn/T/ipykernel_19993/868775873.py:1: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version. Use on_bad_lines in the future.


  collision = pd.read_csv("bigquery_csv.csv",error_bad_lines=False, engine="python")
Skipping line 46444: unexpected end of data
In [24]:
collision.head()
Out[24]:
COLLISION_ID collision_dt collision_day collision_time collision_hour collision_dayoftheweek borough zip_code off_street_name on_street_name ... number_of_pedestrians_killed number_of_persons_injured number_of_persons_killed vehicle_type_code1 vehicle_type_code2 vehicle_type_code_3 vehicle_type_code_4 vehicle_type_code_5 DI_JobID DI_CreateDate
0 22 2012-07-01 10:40:00 2012-07-01 10:40:00 10 1 MANHATTAN 10013.0 NaN CANAL STREET ... 0 0 0 SPORT UTILITY / STATION WAGON VAN NaN NaN NaN Fg9FN7 2023-04-17 21:13:50
1 23 2012-07-01 12:18:00 2012-07-01 12:18:00 0 1 MANHATTAN 10004.0 NaN BATTERY PLACE ... 0 0 0 TAXI TAXI NaN NaN NaN Fg9FN7 2023-04-17 21:07:21
2 24 2012-07-01 15:00:00 2012-07-01 15:00:00 3 1 NaN NaN NaN WATER STREET ... 0 0 0 BUS PASSENGER VEHICLE NaN NaN NaN Fg9FN7 2023-04-17 21:23:12
3 25 2012-07-01 18:00:00 2012-07-01 18:00:00 6 1 MANHATTAN 10007.0 NaN WEST STREET ... 0 0 0 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN Fg9FN7 2023-04-17 21:29:53
4 26 2012-07-01 19:30:00 2012-07-01 19:30:00 7 1 MANHATTAN 10013.0 NaN WEST STREET ... 0 0 0 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN Fg9FN7 2023-04-17 21:07:25

5 rows × 34 columns

In [12]:
import sys
!{sys.executable} -m pip install pandas-profiling
Requirement already satisfied: pandas-profiling in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (3.6.6)
Requirement already satisfied: ydata-profiling in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from pandas-profiling) (4.1.2)
Requirement already satisfied: PyYAML<6.1,>=5.0.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (6.0)
Requirement already satisfied: seaborn<0.13,>=0.10.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (0.11.2)
Requirement already satisfied: typeguard<2.14,>=2.13.2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (2.13.3)
Requirement already satisfied: requests<2.29,>=2.24.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (2.28.1)
Requirement already satisfied: jinja2<3.2,>=2.11.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (3.1.2)
Requirement already satisfied: pydantic<1.11,>=1.8.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (1.10.7)
Requirement already satisfied: pandas!=1.4.0,<1.6,>1.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (1.4.4)
Requirement already satisfied: numpy<1.24,>=1.16.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (1.21.5)
Requirement already satisfied: htmlmin==0.1.12 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (0.1.12)
Requirement already satisfied: matplotlib<3.7,>=3.2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (3.6.3)
Requirement already satisfied: tqdm<4.65,>=4.48.2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (4.64.1)
Requirement already satisfied: imagehash==4.3.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (4.3.1)
Requirement already satisfied: multimethod<1.10,>=1.4 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (1.9.1)
Requirement already satisfied: statsmodels<0.14,>=0.13.2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (0.13.2)
Requirement already satisfied: visions[type_image_path]==0.7.5 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (0.7.5)
Requirement already satisfied: phik<0.13,>=0.11.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (0.12.3)
Requirement already satisfied: scipy<1.10,>=1.4.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from ydata-profiling->pandas-profiling) (1.9.1)
Requirement already satisfied: PyWavelets in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from imagehash==4.3.1->ydata-profiling->pandas-profiling) (1.3.0)
Requirement already satisfied: pillow in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from imagehash==4.3.1->ydata-profiling->pandas-profiling) (9.2.0)
Requirement already satisfied: attrs>=19.3.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling->pandas-profiling) (21.4.0)
Requirement already satisfied: networkx>=2.4 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling->pandas-profiling) (2.8.4)
Requirement already satisfied: tangled-up-in-unicode>=0.0.4 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from visions[type_image_path]==0.7.5->ydata-profiling->pandas-profiling) (0.2.0)
Requirement already satisfied: MarkupSafe>=2.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from jinja2<3.2,>=2.11.1->ydata-profiling->pandas-profiling) (2.1.2)
Requirement already satisfied: contourpy>=1.0.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (1.0.7)
Requirement already satisfied: kiwisolver>=1.0.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (1.4.2)
Requirement already satisfied: fonttools>=4.22.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (4.25.0)
Requirement already satisfied: packaging>=20.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (21.3)
Requirement already satisfied: cycler>=0.10 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (0.11.0)
Requirement already satisfied: pyparsing>=2.2.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (3.0.9)
Requirement already satisfied: python-dateutil>=2.7 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from matplotlib<3.7,>=3.2->ydata-profiling->pandas-profiling) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from pandas!=1.4.0,<1.6,>1.1->ydata-profiling->pandas-profiling) (2022.1)
Requirement already satisfied: joblib>=0.14.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from phik<0.13,>=0.11.1->ydata-profiling->pandas-profiling) (1.1.0)
Requirement already satisfied: typing-extensions>=4.2.0 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from pydantic<1.11,>=1.8.1->ydata-profiling->pandas-profiling) (4.3.0)
Requirement already satisfied: certifi>=2017.4.17 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->ydata-profiling->pandas-profiling) (2022.9.24)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->ydata-profiling->pandas-profiling) (1.26.11)
Requirement already satisfied: charset-normalizer<3,>=2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->ydata-profiling->pandas-profiling) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from requests<2.29,>=2.24.0->ydata-profiling->pandas-profiling) (3.3)
Requirement already satisfied: patsy>=0.5.2 in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from statsmodels<0.14,>=0.13.2->ydata-profiling->pandas-profiling) (0.5.2)
Requirement already satisfied: six in /Users/prathamesh/opt/anaconda3/lib/python3.9/site-packages (from patsy>=0.5.2->statsmodels<0.14,>=0.13.2->ydata-profiling->pandas-profiling) (1.16.0)

[notice] A new release of pip is available: 23.0.1 -> 23.1
[notice] To update, run: pip install --upgrade pip
In [13]:
from pandas_profiling import ProfileReport
/var/folders/8q/3zgzcf5j6fbfp97d3vlhbt_m0000gn/T/ipykernel_19993/2274191625.py:1: DeprecationWarning: `import pandas_profiling` is going to be deprecated by April 1st. Please use `import ydata_profiling` instead.
  from pandas_profiling import ProfileReport
In [25]:
vehicle_profile = ProfileReport(vehicles, title='Motor_Vehicle_Collisions_Vehicles_Report')
person_profile = ProfileReport(person,title='Motor_Vehicle_Collisions_Person_Report')
collision_profile = ProfileReport(collision, title="Big Query Collision Report")
In [17]:
person_profile
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
Out[17]:

In [18]:
vehicle_profile
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
Out[18]:

In [26]:
collision_profile
Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]
Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]
Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]
Out[26]: